# Copyright (c) 2021 changwei@iscas.ac.cn
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# 1. Redistributions of source code must retain the above copyright notice, this list of
#    conditions and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
#    of conditions and the following disclaimer in the documentation and/or other materials
#    provided with the distribution.
# 
# 3. Neither the name of the copyright holder nor the names of its contributors may be used
#    to endorse or promote products derived from this software without specific prior written
#    permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
import codecs
import json
import os
import re

MD_DOC_INDEX = None

# MD_LINK_REGEX = re.compile('\[.*\]\((.*\.md)')  # 误判情况: [xxx](xxx.md) xxx [xxx](xxx.md)
# MD_LINK_REGEX = re.compile('\[.*?\]\((.*?\.md)')  # 误判情况: [xxx](https://xxx.com/xxx) xxx [xxx](xxx.md)
MD_LINK_REGEX = re.compile('\[[^\[]*?\]\(([^\[]*?\.md)')
MD_HTML_LINK_REGEX = re.compile('<a href="([^<]*\.md)">')
GIF_LINK_REGEX = re.compile('!\[.*\]\((.*\.gif)')
JPG_LINK_REGEX = re.compile('!\[.*\]\((.*\.jpg)')
PNG_LINK_REGEX = re.compile('!\[.*\]\((.*\.png)')

modified_list = []  # 记录更改的文件信息
not_modified_list = []  # 记录无主的文件信息


def resolve_md_links(md_text: str, in_md_abs_path: str, out_md_abs_path: str,
                     input_doc_root: str, output_doc_root: str):
    in_md_abs_pwd_path = '/'.join(in_md_abs_path.split('/')[:-1])

    raw_lines = md_text.split('\n')
    mod_lines = []
    replace_counter = 0
    failed_counter = 0

    for lineno, line in enumerate(raw_lines):
        md_link_patterns = []
        md_link_patterns += re.findall(MD_LINK_REGEX, line)
        md_link_patterns += re.findall(MD_HTML_LINK_REGEX, line)
        if len(md_link_patterns) == 0:
            # 这一行是普通文本，regex 没找到 md 链接
            mod_lines.append(line)
            continue
        for pattern in md_link_patterns:
            # 处理这一行中出现的所有 md 链接 pattern
            if 'https://' in pattern:
                # 特殊情况：https://，引用了网上的 md 文件
                # 解决方法：无视之
                not_modified_list.append({
                    'input_md': in_md_abs_path,
                    'output_md': out_md_abs_path,
                    'line_no': lineno + 1,
                    'pattern': pattern,
                    'handle': 'ignore',
                })
                failed_counter += 1
                continue

            # 得到该文件链接指向的 工程目录绝对路径，清除掉所有 “.” “..”
            # source_md_abs_path: 源 md 所在的绝对路径
            source_md_abs_path = os.path.normpath(
                os.path.join(in_md_abs_pwd_path, pattern))
            # source_md_name: 源 md 文档的文件名
            source_md_name = source_md_abs_path.split('/')[-1]
            # source_md_abs_pwd_path: 源 md 所在文件夹的绝对路径
            source_md_abs_pwd_path = '/'.join(
                source_md_abs_path.split('/')[:-1])
            # source_md_rel_pwd_path: 源 md 所在文件夹（相对于 docs/zh-cn 根目录）的相对路径
            source_md_rel_pwd_path = source_md_abs_pwd_path.replace(
                input_doc_root, '')
            # source_md_rel_path: 源 md 文档的相对路径
            source_md_rel_path = os.path.join(source_md_rel_pwd_path,
                                              source_md_name)

            if '.md' not in source_md_rel_path:
                print(f'ERROR: {source_md_name}')
                print(f'ERROR: {source_md_abs_path}')
                print(f'ERROR: {source_md_abs_pwd_path}')
                print(f'ERROR: {source_md_rel_path}')
                print(f'ERROR: {source_md_rel_pwd_path}')
                assert False

            if source_md_rel_path not in MD_DOC_INDEX:
                # 如果没找到指定的 md，删掉此行，记录在案
                not_modified_list.append({
                    'input_md': in_md_abs_path,
                    'output_md': out_md_abs_path,
                    'line_no': lineno + 1,
                    'pattern': pattern,
                    'source_md_abs_path': source_md_abs_path,
                    'source_md_rel_path': source_md_rel_path,
                    'handle': 'delete',
                })
                failed_counter += 1
                line = ''
                continue

            # 如果找到了此 md，则替换相对链接为 permalink
            md_permalink = MD_DOC_INDEX[source_md_rel_path]['front matter'][
                'permalink']
            # 最后把跳转链接插入到 md 文档中
            line = line.replace(pattern, md_permalink)

            modified_list.append({
                'input_md': in_md_abs_path,
                'output_md': out_md_abs_path,
                'line_no': lineno + 1,
                'pattern': pattern,
                'source_md_abs_path': source_md_abs_path,
                'source_md_rel_path': source_md_rel_path,
                # 'target_md_abs_path': target_md_abs_path,
                'permalink': md_permalink,
            })
            replace_counter += 1

        mod_lines.append(line)
    print(f'已完成：resolve_md_links(..., {in_md_abs_path}, {out_md_abs_path})'
          )  # DEBUG
    print(
        f'\t成功替换了：{replace_counter}个 md 链接，未能成功替换：{failed_counter}个。')  # DEBUG

    return '\n'.join(mod_lines)


def resolve_image_links(md_text, in_md_abs_path, out_md_abs_path,
                        input_doc_root, output_doc_root):
    in_md_abs_pwd_path = '/'.join(in_md_abs_path.split('/')[:-1])

    raw_lines = md_text.split('\n')
    mod_lines = []
    replace_counter = 0
    failed_counter = 0
    for lineno, line in enumerate(raw_lines):
        img_relative_path = []
        img_relative_path += re.findall(GIF_LINK_REGEX, line)
        img_relative_path += re.findall(JPG_LINK_REGEX, line)
        img_relative_path += re.findall(PNG_LINK_REGEX, line)
        if len(img_relative_path) == 0:
            # 这一行是普通文本，regex 没找到图片链接
            mod_lines.append(line)
        else:
            for pattern in img_relative_path:
                if 'https://' in pattern:
                    # 特殊情况：https://，引用了网上的图片
                    # 解决方法：无视之
                    not_modified_list.append({
                        'input_md': in_md_abs_path,
                        'output_md': out_md_abs_path,
                        'line_no': lineno + 1,
                        'pattern': pattern,
                        'handle': 'ignore',
                    })
                    failed_counter += 1
                    continue

                # 得到该文件链接指向的 工程目录绝对路径，清除掉所有 “.” “..”
                # source_img_path: 原图片所在的绝对路径（mv 命令的起点）
                source_img_path = os.path.normpath(
                    os.path.join(in_md_abs_pwd_path, pattern))
                if not os.path.exists(source_img_path):
                    # 如果没找到指定的图片，删掉此行，记录在案
                    not_modified_list.append({
                        'input_md': in_md_abs_path,
                        'output_md': out_md_abs_path,
                        'line_no': lineno + 1,
                        'pattern': pattern,
                        'source_img': source_img_path,
                        'handle': 'delete',
                    })
                    failed_counter += 1
                    line = ''
                    continue
                else:
                    # 如果在 image 文件列表内找到了此图片，则创建 images/... 文件夹，
                    # 移动图片文件到 images/...，更新相对路径，插入到 md 文档中
                    # img_pwd_path: 原图片所在文件夹的绝对路径
                    img_pwd_path = '/'.join(source_img_path.split('/')[:-1])
                    # source_img_rel_path: 原图片所在文件夹的相对路径（相对于 docs repo root）
                    source_img_rel_path = img_pwd_path.replace(
                        input_doc_root, '')
                    # vdoing_img_pwd_path: 复制图片所在文件夹的相对路径（相对于 public 文件夹）
                    vdoing_img_pwd_path = os.path.join('images',
                                                       source_img_rel_path)
                    # target_img_pwd_path: 复制图片所在文件夹的绝对路径（mv 命令的 destination）
                    target_img_pwd_path = os.path.join(output_doc_root,
                                                       vdoing_img_pwd_path)
                    os.system(f'mkdir -p "{target_img_pwd_path}"')
                    os.system(
                        f'cp "{source_img_path}" "{target_img_pwd_path}"')
                    # raw_img_pwd_path: 原图片所在目录的相对路径（需要替换为相对于 public 文件夹的相对路径）
                    raw_img_pwd_path = '/'.join(pattern.split('/')[:-1])
                    line = line.replace(raw_img_pwd_path,
                                        '/' + vdoing_img_pwd_path)
                    modified_list.append({
                        'input_md': in_md_abs_path,
                        'output_md': out_md_abs_path,
                        'line_no': lineno + 1,
                        'pattern': pattern,
                        'source_image': source_img_path,
                        'target_image': vdoing_img_pwd_path,
                    })
                    replace_counter += 1
            mod_lines.append(line)
    print(f'已完成：resolve_image_links(..., {in_md_abs_path}, {out_md_abs_path})'
          )  # DEBUG
    print(f'\t成功替换了：{replace_counter}个 image 链接，未能成功替换：{failed_counter}个。'
          )  # DEBUG

    return '\n'.join(mod_lines)


def copy_md(json_list: dict, input_doc_root, output_doc_root):
    for md_path, json_dict in json_list.items():
        input_md_path = os.path.join(input_doc_root, md_path)

        output_md_path = os.path.join(output_doc_root,
                                      json_dict['target_path'])
        output_md_pwd_path = '/'.join(output_md_path.split('/')[:-1])
        os.system(f'mkdir -p {output_md_pwd_path}')

        assert os.path.exists(
            input_md_path), f'MD file doesn\'t exist: {input_md_path}'

        with codecs.open(input_md_path, 'r', encoding='utf-8') as inMd:
            input_md_text = inMd.read()
        output_md_text = resolve_md_links(input_md_text, input_md_path,
                                          output_md_path, input_doc_root,
                                          output_doc_root)
        output_md_text = resolve_image_links(output_md_text, input_md_path,
                                             output_md_path, input_doc_root,
                                             output_doc_root)

        front_matter_list = ['---'] + [
            f'{k}: {v}' for k, v in json_dict['front matter'].items()
        ] + ['---\n']
        front_matter_text = '\n'.join(front_matter_list)

        with codecs.open(output_md_path, 'w', encoding='utf-8') as outMd:
            outMd.write(front_matter_text)
            outMd.write(output_md_text)


def copy_md_main(input_filename, input_doc_root, output_doc_root, mod_list,
                 unfound_list):
    global MD_DOC_INDEX

    # print(input_filename)  # DEBUG
    with codecs.open(input_filename, 'r', encoding='utf-8') as inFile:
        MD_DOC_INDEX = json.load(inFile)

    os.system(f'rm -rf {output_doc_root}')
    copy_md(MD_DOC_INDEX, input_doc_root, output_doc_root)
    ''' 保存列表记录文件 '''
    with codecs.open(mod_list, 'w', encoding='utf-8') as mlf:
        json.dump(modified_list, mlf, indent=2, ensure_ascii=False)
    with codecs.open(unfound_list, 'w', encoding='utf-8') as nmlf:
        json.dump(not_modified_list, nmlf, indent=2, ensure_ascii=False)


if __name__ == '__main__':
    pass
